Load Packages

library(ggplot2)
library(readr)
library(ggmap)
library(maps)
library(stringr)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:ggmap':
## 
##     inset
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(rgeos)
## rgeos version: 0.3-23, (SVN revision 546)
##  GEOS runtime version: 3.4.2-CAPI-1.8.2 r3921 
##  Linking to sp version: 1.2-4 
##  Polygon checking: TRUE
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
library(RColorBrewer)

PRISM uses image-based file storage, which means we need to work with two file types:

bil files (band interleaved by lines)

hdr files (associated header file)

Minimum Temperatures

filenames<-list.files("./data/PRISM/min_temp")
filenames<-filenames[grep(".*\\.bil$",filenames)]
filenames<-gsub("PRISM","\\./data/PRISM/min_temp/PRISM",filenames)

require(maptools)
require(raster)

mean2<-function(x){return(mean(x,na.rm=T))}
US_Counties<-readShapePoly("./data/US/US_County_2000_2004_geo")
US_Counties_Polygons<-as(US_Counties,"SpatialPolygons")

#Minimum Temperature
r<-raster(filenames[1])
z<-raster::extract(r,US_Counties_Polygons)
min_temp<-as.data.frame(cbind(rep(2000,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(z,mean2))))
names(min_temp)<-c("year","fips","av")

for (i in 2:16){
  r<-raster(filenames[i])
  this.year<-i+1999
  z<-raster::extract(r,US_Counties_Polygons)
  tmp<-as.data.frame(cbind(rep(this.year,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(z,mean2))))
  names(tmp)<-c("year","fips","av")
  min_temp<-rbind(min_temp,tmp)
}

Maximum Temperatures

filenames<-list.files("./data/PRISM/max_temp")
filenames<-filenames[grep(".*\\.bil$",filenames)]
filenames<-gsub("PRISM","\\./data/PRISM/max_temp/PRISM",filenames)

require(maptools)
require(raster)

mean2<-function(x){return(mean(x,na.rm=T))}
US_Counties<-readShapePoly("./data/US/US_County_2000_2004_geo")
US_Counties_Polygons<-as(US_Counties,"SpatialPolygons")

#Maximum Temperature
s<-raster(filenames[1])
y<-raster::extract(s,US_Counties_Polygons)
max_temp<-as.data.frame(cbind(rep(2000,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(y,mean2))))
names(max_temp)<-c("year","fips","av")

for (i in 2:16){
  s<-raster(filenames[i])
  this.year<-i+1999
  y<-raster::extract(s,US_Counties_Polygons)
  tmp1<-as.data.frame(cbind(rep(this.year,length(US_Counties$FIPSNum)),US_Counties$FIPSNum,unlist(lapply(y,mean2))))
  names(tmp1)<-c("year","fips","av")
  max_temp<-rbind(max_temp,tmp1)
}

Convert dataframes to .csv

write.csv(min_temp,file="MinTemp")
write.csv(max_temp,file="MaxTemp")

Read in additional data

#CDC data
library(readr)
ld<-read_csv("./data/CDC/ld-case-counts-by-county-00-15.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   STNAME = col_character(),
##   CTYNAME = col_character()
## )
## See spec(...) for full column specifications.
#Census
pop<-read_csv("./data/CENSUS/county_population.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   fips = col_character(),
##   areaname = col_character(),
##   state_name = col_character(),
##   county_name = col_character(),
##   fipsst = col_character(),
##   fipsco = col_character()
## )
## See spec(...) for full column specifications.
#Load all PRISM data
load("get_PRISM.Rda")
mintemp<-read.csv("MinTemp")
maxtemp<-read.csv("MaxTemp")

#Load saved dataframes
load("get_DATA.Rda")

#Remove X column
mintemp$X<-NULL
maxtemp$X<-NULL

Pop TidyVerse Conversion

library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
library(magrittr)
library(dplyr)

pop %<>% select(fips,state_name,county_name,starts_with("pop2"))
pop %<>% gather(starts_with("pop2"),key="str_year",value="size") %>% na.omit
pop %<>% mutate(year=gsub("pop","",str_year))
pop %<>% mutate(year=as.numeric(year))
pop %<>% mutate(fips=gsub("^0","",fips))
pop %<>% mutate(fips=as.integer(fips))
pop$str_year<-NULL
names(pop)[names(pop) == 'size'] <- 'pop'

LD TidyVerse Conversion

ld %<>% gather(starts_with("Cases"),key="str_year",value="cases") 
ld %<>% mutate(year=gsub("Cases","",str_year)) 
ld %<>% mutate(year=as.numeric(year))
ld %<>% rename(state=STNAME,county=CTYNAME)

fips.builder<-function(st,ct){
  if (nchar(ct)==3){
    fips<-paste(as.character(st),as.character(ct),sep="") %>% as.integer
  }
  else if (nchar(ct)==2){
    fips<-paste(as.character(st),"0",as.character(ct),sep="") %>% as.integer
  }
  else {
    fips<-paste(as.character(st),"00",as.character(ct),sep="") %>% as.integer
  }
  return(fips)
}

ld %<>% rowwise() %>% mutate(fips=fips.builder(STCODE,CTYCODE)) 

ld %<>% select(-c(STCODE,CTYCODE,str_year))

Merge all data frames

#Join prism prcp and avtmp with ld 
ld.total<-inner_join(ld,prism)
#Join with mintemp and rename av column to mintmp
ld.total<-inner_join(ld.total,mintemp)
names(ld.total)[names(ld.total) == 'av'] <- 'mintmp'
#Join with maxtemp and rename av column to maxtmp
ld.total<-inner_join(ld.total,maxtemp)
names(ld.total)[names(ld.total) == 'av'] <- 'maxtmp'
#Join with pop
ld.pop<-inner_join(ld.total,pop)
## Joining, by = c("year", "fips")
ld.pop$state_name<-NULL


## CENSES data only goes until 2014, so obs go from 49744 to 46630

countyTick

#Compare countyTick and ld.total county names to ensure congruence for final merge: countyTick contains 3080 obs, so missing 29 counties, rename county and state to County and State

names(ld.total)[names(ld.total) == 'county'] <- 'County'
names(ld.total)[names(ld.total) == 'state'] <- 'State'

countyTick<-read_csv("./data/TICK/countyTick.csv")
#countyTick data frame lists only county name, ld.total lists "County" following county name
ld.total$County<-gsub(" County","",ld.total$County)
ld.total$County<-gsub(" Parish","",ld.total$County)

#Need to remove some unecessary columns, only need first 3 and tick_presence
countyTick$State_FIPS<-NULL
countyTick$County_FIPS<-NULL
countyTick$fipsname<-NULL
countyTick$polyname<-NULL

merging issues

ld.total$SC<-paste(ld.total$County,ld.total$State,sep=",")


# repeats in countyTick - solution:
FIPScounter<-table(countyTick$fips)
FIPScounter<-FIPScounter[FIPScounter>1]
problemFIPS<-as.integer(names(FIPScounter))
problemFIPS.locs<-NULL
for (k in problemFIPS){
  locs<-which(countyTick$fips==k)
  locs<-locs[-1]
  problemFIPS.locs<-c(problemFIPS.locs,locs)
}
countyTick<-countyTick[-problemFIPS.locs,]

## VA cities - TO BE REVISITED: JL "CITY PROBABLY HAS HIGH CASE NUMBERS SO DON"T THROW DATA AWAY IF WE DON"T HAVE TO"
tmp1<-subset(ld.total,year==2008 & State=="Virginia")
tmp2<-subset(countyTick,State=="Virginia")
setdiff(tmp1$County,tmp2$County)
addCity<-setdiff(tmp2$County,tmp1$County)

for (i in 1:dim(countyTick)[1]){
  if (countyTick$State[i]=="Virginia"){
    if (countyTick$County[i] %in% addCity){
      countyTick$County[i]<-paste(countyTick$County[i],"city",sep=" ")
    }
  }
}


#Fixing Inconsistencies
countyTick %<>% mutate(County=str_replace_all(County,"Mountrial","Mountrail"))
countyTick %<>% mutate(County=str_replace_all(County,"Miami Dade","Miami-Dade"))
countyTick %<>% mutate(County=str_replace_all(County,"De Kalb","DeKalb"))
ld.total %<>% mutate(County=str_replace_all(County,"District of Columbia","Washington"))
countyTick %<>% mutate(County=str_replace_all(County,"De Soto","DeSoto"))
ld.total %<>% mutate(County=str_replace_all(County,"De Soto","DeSoto"))
countyTick %<>% mutate(County=str_replace_all(County,"Du Page","DuPage"))
countyTick %<>% mutate(County=str_replace_all(County,"La Salle","LaSalle"))
ld.total %<>% mutate(County=str_replace_all(County,"La Salle","LaSalle"))
countyTick %<>% mutate(County=str_replace_all(County,"Lagrange","LaGrange"))
countyTick %<>% mutate(County=str_replace_all(County,"La Porte","LaPorte"))
countyTick %<>% mutate(County=str_replace_all(County,"LaFourche","Lafourche"))
countyTick %<>% mutate(County=str_replace_all(County,"Prince Georges","Prince George's"))
countyTick %<>% mutate(County=str_replace_all(County,"Queen Annes","Queen Anne's"))
countyTick %<>% mutate(County=str_replace_all(County,"St. Marys","St. Mary's"))
countyTick %<>% mutate(County=str_replace_all(County,"Lac Qui Parle","Lac qui Parle"))
ld.total %<>% mutate(County=str_replace_all(County,"Do\U3e34613ca Ana","Dona Ana"))
countyTick %<>% mutate(County=str_replace_all(County,"La Moure","LaMoure"))
countyTick %<>% mutate(County=str_replace_all(County,"De Witt","DeWitt"))
ld.total %<>% mutate(County=str_replace_all(County,"De Witt","DeWitt"))
countyTick %<>% mutate(County=str_replace_all(County,"Fond Du Lac","Fond du Lac"))

# Adding missing counties to countyTick

countyTick.add<-rbind(countyTick,c(55078,"Menominee","Wisconsin",2))
countyTick.add<-rbind(countyTick.add,c(4012,"La Paz","Arizona",1))
countyTick.add<-rbind(countyTick.add,c(8014,"Broomfield","Colorado",1))
countyTick<-countyTick.add

countyTick$SC<-paste(countyTick$County,countyTick$State,sep=",")
ld.total$SC<-paste(ld.total$County,ld.total$State,sep=",")

# Adding missing cities to countyTick
newCity<-data.frame(fips=c(24510,29510,51510,51515,51520,51530,51540,51550,51570,51580,51590,51595,51600,51610,51620,51630,51640,51660,51670,51678,51680,51683,51685,51690,51720,51730,51735,51740,51750,51760,51770,51775,51790,51820,51830,51840),County=c("Baltimore city","St. Louis city","Alexandria city","Bedford city","Bristol city","Buena Vista city","Charlottesville city","Chesapeake city","Colonial Heights city","Covington city","Danville city","Emporia city","Fairfax city","Falls Church city","Franklin city","Fredericksburg city","Galax city","Harrisonburg city","Hopewell city","Lexington city","Lynchburg city","Manassas city","Manassas Park city","Martinsville city","Norton city","Petersburg city","Poquoson city","Portsmouth city","Radford city","Richmond city","Roanoke city","Salem city","Staunton city","Waynesboro city","Williamsburg city","Winchester city"),State=c("Maryland","Missouri","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia","Virginia"),Tick_presence=c(2,1,2,1,1,2,2,2,2,1,2,1,1,1,2,2,2,2,2,2,2,2,2,1,1,1,2,1,2,2,2,2,2,2,1,2))


newCity$SC<-paste(newCity$County,newCity$State,sep=",")
countyTick<-rbind(newCity,countyTick)

setdiff(ld.total$County,countyTick$County)

# No differences in dataframes

Merge ld.total and countyTick

countyTick$fips<-as.numeric(countyTick$fips)
countyTick$County<-as.character(countyTick$County)
countyTick$State<-as.character(countyTick$State)
countyTick$Tick_presence<-as.numeric(countyTick$Tick_presence)

ld.tick<-inner_join(ld.total,countyTick)

Convert Tick_presence to 0 (1 or 3) or 1 (2 or 4)

ld.tick$Presence_current<-ld.tick$Tick_presence
ld.tick$Presence_current<-ifelse(ld.tick$Presence_current%in%c(1,3),0,1)

Do cases occur only where ticks are present?

ld.tick %>% filter(cases>0) %>% ggplot(aes(x=as.factor(Presence_current),y=cases))+geom_boxplot()+scale_y_log10()+labs(x="Current Tick Presence",y="Cases",title="Do cases occur only where ticks are present?",caption="0=No ticks present || 1=Ticks present")

County_map & ld.tick discrepencies

county_map<-map_data("county")
county_map %<>% mutate(region=str_to_title(region))
county_map %<>% mutate(subregion=str_to_title(subregion))

county_map %<>% mutate(subregion=str_replace_all(subregion,"De Kalb","DeKalb"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Clair","St. Clair"))  
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Francis","St. Francis"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"De Soto","DeSoto"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Johns","St. Johns"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Lucie","St. Lucie"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcduffie","McDuffie"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcintosh","McIntosh"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"La Salle","LaSalle"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcdonough","McDonough"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mchenry","McHenry"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mclean","McLean"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lagrange","LaGrange"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"La Porte","LaPorte"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Joseph","St. Joseph"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Obrien","O'Brien"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcpherson","McPherson"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccracken","McCracken"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccreary","McCreary"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Bernard","St. Bernard"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Charles","St. Charles"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Helena","St. Helena"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St James","St. James"))

county_map %<>% mutate(subregion=str_replace_all(subregion,"St John The Baptist","St. John the Baptist"))

county_map %<>% mutate(subregion=str_replace_all(subregion,"St Landry","St. Landry"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Martin","St. Martin"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Mary","St. Mary"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Tammany","St. Tammany"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Prince Georges","Prince George's"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Queen Annes","Queen Anne's"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St. Marys","St. Mary's"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Baltimore City","Baltimore city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lac Qui Parle","Lac qui Parle"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lake Of The Woods","Lake of the Woods"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcleod","McLeod"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Louis","St. Louis"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcdonald","McDonald"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Francois","St. Francois"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St. Louis City","St. Louis city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Ste Genevieve","Ste. Genevieve"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Lewis And Clark","Lewis and Clark"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccone","McCone"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Yellowstone National","Yellowstone"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mckinley","McKinley"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Lawrence","St. Lawrence"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcdowell","McDowell"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"La Moure","LaMoure"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mckenzie","McKenzie"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcclain","McClain"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccurtain","McCurtain"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mckean","McKean"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccormick","McCormick"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mccook","McCook"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcminn","McMinn"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcnairy","McNairy"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcculloch","McCulloch"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mclennan","McLennan"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Mcmullen","McMullen"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Isle Of Wight","Isle of Wight"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"King And Queen","King and Queen"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Newport News","Newport News city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Virginia Beach","Virginia Beach city"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Fond Du Lac","Fond du Lac"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"St Croix","St. Croix"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"De Witt","DeWitt"))
county_map %<>% mutate(subregion=str_replace_all(subregion,"Du Page","DuPage"))

#Changing Virginia cities to associated counties

ld.tick.county<-ld.tick

ld.tick.county%<>% mutate(County=str_replace_all(County,"Alexandria city","Fairfax"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Bedford city","Bedford"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Bristol city","Washington"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Buena Vista city","Rockbridge"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Charlottesville city","Albemarle"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Chesapeake city","Norfolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Colonial Heights city","Dinwiddie"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Covington city","Alleghany"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Danville city","Pittsylvania"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Emporia city","Greensville"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Fairfax city","Fairfax"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Falls Church city","Fairfax"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Franklin city","Accomack"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Fredericksburg city","Spotsylvania"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Galax city","Carroll"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Hampton city","Hampton"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Harrisonburg city","Rockingham"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Hopewell city","Prince George"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Lexington city","Rockbridge"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Lynchburg city","Campbell"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Manassas city","Prince William"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Manassas Park city","Prince William"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Martinsville city","Henry"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Norfolk city","Norfolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Norton city","Wise"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Petersburg city","Dinwiddie"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Poquoson city","York"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Portsmouth city","Norfolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Radford city","Montgomery"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Richmond city","Henrico"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Roanoke city","Roanoke"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Salem city","Roanoke"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Staunton city","Augusta"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Suffolk city","Suffolk"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Waynesboro city","Augusta"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Williamsburg city","James City"))
ld.tick.county%<>% mutate(County=str_replace_all(County,"Winchester city","Frederick"))
                                                
ld.tick.county$SC<-paste(ld.tick.county$County,ld.tick.county$State,sep=",")
county_map %<>% mutate(SC=paste(subregion,region,sep=","))

setdiff(county_map$subregion,ld.tick.county$County)
setdiff(ld.tick.county$County,county_map$subregion)

Where do 0 tick presence Lymes Disease cases occur geographically?

ld.av.edit<-aggregate(ld.tick.county$cases,by=list(ld.tick.county$SC,ld.tick.county$Presence_current),FUN=mean)
names(ld.av.edit)<-c("SC","Presence_current","av.cases")
tmp1<-inner_join(county_map,ld.av.edit)
## Joining, by = "SC"
case.0<-subset(tmp1,Presence_current==0)

case.2<-subset(case.0,av.cases>2)
case.1<-subset(case.0,av.cases>1)
case.1<-unique(case.1)

case.2<-unique(case.2)
#[1] "Maricopa,Arizona"          "Pima,Arizona"             
 #[3] "Alameda,California"        "Contra Costa,California"  
 #[5] "Humboldt,California"       "Los Angeles,California"   
 #[7] "Marin,California"          "Mendocino,California"     
 #[9] "Nevada,California"         "Riverside,California"     
#[11] "San Diego,California"      "San Francisco,California" 
#[13] "San Mateo,California"      "Santa Barbara,California" 
#[15] "Santa Clara,California"    "Santa Cruz,California"    
#[17] "Sonoma,California"         "Marion,Indiana"           
#[19] "Starke,Indiana"            "Johnson,Kansas"           
#[21] "Allegan,Michigan"          "Kent,Michigan"            
#[23] "Ottawa,Michigan"           "Washtenaw,Michigan"       
#[25] "Clearwater,Minnesota"      "Lake,Minnesota"           
#[27] "Polk,Minnesota"            "Douglas,Nebraska"         
#[29] "Lancaster,Nebraska"        "Clark,Nevada"             
#[31] "Washoe,Nevada"             "Cass,North Dakota"        
#[33] "Grand Forks,North Dakota"  "Delaware,Ohio"            
#[35] "Franklin,Ohio"             "Hamilton,Ohio"            
#[37] "Montgomery,Ohio"           "Douglas,Oregon"           
#[39] "Jackson,Oregon"            "Josephine,Oregon"         
#[41] "Multnomah,Oregon"          "Allegheny,Pennsylvania"   
#[43] "Armstrong,Pennsylvania"    "Beaver,Pennsylvania"      
#[45] "Butler,Pennsylvania"       "Clarion,Pennsylvania"     
#[47] "Fayette,Pennsylvania"      "Indiana,Pennsylvania"     
#[49] "Lawrence,Pennsylvania"     "Mercer,Pennsylvania"      
#[51] "Somerset,Pennsylvania"     "Washington,Pennsylvania"  
#[53] "Westmoreland,Pennsylvania" "Brown,Texas"              
#[55] "Frederick,Virginia"        "James City,Virginia"      
#[57] "Pulaski,Virginia"          "Rockbridge,Virginia"      
#[59] "Shenandoah,Virginia"       "Warren,Virginia"          
#[61] "King,Washington"          
 
ggplot(tmp1,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(tmp1$Presence_current==0,"red","black"),fill=ifelse(tmp1$av.cases>2,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)+labs(x="Longitude",y="Latitude",title="Average Lyme Disease Cases per County",caption="Red outline= tick presence of 0 || Black outline= tick presence of 1
                                                                                                                                                                                                                                    Gold fill=Average cases >2 || Blue fill= Average cases < 2")

#Counties outlined in red and filled with gold have 0 tick presence but av.cases>2

When do these cases occur?

tmp<-aggregate(ld.tick.county$prcp,by=list(ld.tick.county$SC,ld.tick.county$Presence_current,ld.tick.county$year,ld.tick.county$cases),FUN=mean)
names(tmp)<-c("SC","Presence_current","year","cases","av.prcp")

#2000: Washington Oregon, northeast, Florida and Texas area lacking cases 
tmp2000<-subset(tmp,year==2000)
map.2000<-inner_join(tmp2000,county_map)
## Joining, by = "SC"
ggplot(map.2000,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2000$Presence_current==0,"red","black"),fill=ifelse(map.2000$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2001: some case expansion near Oregon and Texas
tmp2001<-subset(tmp,year==2001)
map.2001<-inner_join(tmp2001,county_map)
## Joining, by = "SC"
ggplot(map.2001,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2001$Presence_current==0,"red","black"),fill=ifelse(map.2001$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2002: same as 2001 with some Northeast expansion
tmp2002<-subset(tmp,year==2002)
map.2002<-inner_join(tmp2002,county_map)
## Joining, by = "SC"
ggplot(map.2002,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2002$Presence_current==0,"red","black"),fill=ifelse(map.2002$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2003: Northeast expansion W-O regression
tmp2003<-subset(tmp,year==2003)
map.2003<-inner_join(tmp2003,county_map)
## Joining, by = "SC"
ggplot(map.2003,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2003$Presence_current==0,"red","black"),fill=ifelse(map.2003$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2004: California and north (Michigan) expansion
tmp2004<-subset(tmp,year==2004)
map.2004<-inner_join(tmp2004,county_map)
## Joining, by = "SC"
ggplot(map.2004,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2004$Presence_current==0,"red","black"),fill=ifelse(map.2004$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2005: same as 2004 except loss of one midwwest county and Texas expansion**
tmp2005<-subset(tmp,year==2005)
map.2005<-inner_join(tmp2005,county_map)
## Joining, by = "SC"
ggplot(map.2005,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2005$Presence_current==0,"red","black"),fill=ifelse(map.2005$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2006: Texas, northern, and California regression** 
tmp2006<-subset(tmp,year==2006)
map.2006<-inner_join(tmp2006,county_map)
## Joining, by = "SC"
ggplot(map.2006,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2006$Presence_current==0,"red","black"),fill=ifelse(map.2006$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2007: Still no Texas, Southwest expansion
tmp2007<-subset(tmp,year==2007)
map.2007<-inner_join(tmp2007,county_map)
## Joining, by = "SC"
ggplot(map.2007,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2007$Presence_current==0,"red","black"),fill=ifelse(map.2007$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2008: Texas expansion, a lot of northwest expansion**
tmp2008<-subset(tmp,year==2008)
map.2008<-inner_join(tmp2008,county_map)
## Joining, by = "SC"
ggplot(map.2008,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2008$Presence_current==0,"red","black"),fill=ifelse(map.2008$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2009: Southern California expasion, Texas maintanence**
tmp2009<-subset(tmp,year==2009)
map.2009<-inner_join(tmp2009,county_map)
## Joining, by = "SC"
ggplot(map.2009,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2009$Presence_current==0,"red","black"),fill=ifelse(map.2009$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2010: Southern Cali regression
tmp2010<-subset(tmp,year==2010)
map.2010<-inner_join(tmp2010,county_map)
## Joining, by = "SC"
ggplot(map.2010,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2010$Presence_current==0,"red","black"),fill=ifelse(map.2010$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2011: Texas regression, Southern Cali and W-O expansion
tmp2011<-subset(tmp,year==2011)
map.2011<-inner_join(tmp2011,county_map)
## Joining, by = "SC"
ggplot(map.2011,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2011$Presence_current==0,"red","black"),fill=ifelse(map.2011$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2012: Northwest and Southern Cali regression
tmp2012<-subset(tmp,year==2012)
map.2012<-inner_join(tmp2012,county_map)
## Joining, by = "SC"
ggplot(map.2012,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2012$Presence_current==0,"red","black"),fill=ifelse(map.2012$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2013: A lot of mid west expansion**
tmp2013<-subset(tmp,year==2013)
map.2013<-inner_join(tmp2013,county_map)
## Joining, by = "SC"
ggplot(map.2013,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2013$Presence_current==0,"red","black"),fill=ifelse(map.2013$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2014: midwest regression
tmp2014<-subset(tmp,year==2014)
map.2014<-inner_join(tmp2014,county_map)
## Joining, by = "SC"
ggplot(map.2014,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2014$Presence_current==0,"red","black"),fill=ifelse(map.2014$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2015: same as 2014
tmp2015<-subset(tmp,year==2015)
map.2015<-inner_join(tmp2015,county_map)
## Joining, by = "SC"
ggplot(map.2015,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2015$Presence_current==0,"red","black"),fill=ifelse(map.2015$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

Labels for counties with case>1 and case>2 and tick presence=0

cnames.1 <- aggregate(cbind(long, lat) ~ SC, data=case.1, FUN=function(x) mean(range(x)))

cnames.2 <- aggregate(cbind(long, lat) ~ SC, data=case.2, FUN=function(x) mean(range(x)))

ggplot(map.2000,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2000$SC %in% cnames.1$SC,"red","black"),fill=ifelse(map.2000$cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#If outlined in red than av.case>1 and tick presence=0

Avg Prcp for important years

Important year transitions: 2005(cases)-2006(loss of cases) 2008-2009: big case years all around *2013: Midwest cases

#2005-2006 case comparison: 1014.887 and 1035.723 are mean of av.prcp for given year; prcp goes below avg in S.Cali & Midwest but above average in Michigan; precipitation influence only in northeast?

ggplot(map.2005,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2005$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2005$av.prcp>1014.887,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

ggplot(map.2006,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2006$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2006$av.prcp>1035.723,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2008-2009: Rainy northeast midwest, dry California

ggplot(map.2008,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2008$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2008$av.prcp>1075.407,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

ggplot(map.2009,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2009$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2009$av.prcp>1178.298,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

#2013: Dry year northwest, rainy for some of midwest (less than 2008-2009)

ggplot(map.2013,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(map.2013$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(map.2013$av.prcp>1132.778,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)

Socioeconomic and deer Data

#County Classifications
class<-read.csv("~/lyme_modeling/data/CENSUS/County Classifications.csv")
#Income
income<-read.csv("~/lyme_modeling/data/CENSUS/Income.csv")
#Jobs
jobs<-read.csv("~/lyme_modeling/data/CENSUS/Jobs.csv")
#Deer
deer<-read.csv("~/lyme_modeling/data/DEER/deer_density.csv")

Jobs Tidyverse conversion: Unemployment Rate 2007-2015

jobs %<>% gather(starts_with("UnempRate"),key="str_year",value="UnemploymentRate") 
jobs %<>% mutate(year=gsub("UnempRate","",str_year)) 
jobs %<>% mutate(year=as.numeric(year))
jobs %<>% select(c(FIPS,State,County,UnemploymentRate,year))

jobs%<>% mutate(State=str_replace_all(State,"AL","Alabama"))
jobs%<>% mutate(State=str_replace_all(State,"AR","Arkansas"))
jobs%<>% mutate(State=str_replace_all(State,"AZ","Arizona"))
jobs%<>% mutate(State=str_replace_all(State,"CA","California"))
jobs%<>% mutate(State=str_replace_all(State,"CO","Colorado"))
jobs%<>% mutate(State=str_replace_all(State,"CT","Connecticut"))
jobs%<>% mutate(State=str_replace_all(State,"DE","Delaware"))
jobs%<>% mutate(State=str_replace_all(State,"FL","Florida"))
jobs%<>% mutate(State=str_replace_all(State,"GA","Georgia"))
jobs%<>% mutate(State=str_replace_all(State,"ID","Idaho"))
jobs%<>% mutate(State=str_replace_all(State,"IL","Illinois"))
jobs%<>% mutate(State=str_replace_all(State,"IN","Indiana"))
jobs%<>% mutate(State=str_replace_all(State,"IA","Iowa"))
jobs%<>% mutate(State=str_replace_all(State,"KS","Kansas"))
jobs%<>% mutate(State=str_replace_all(State,"KY","Kentucky"))
jobs%<>% mutate(State=str_replace_all(State,"LA","Louisiana"))
jobs%<>% mutate(State=str_replace_all(State,"ME","Maine"))
jobs%<>% mutate(State=str_replace_all(State,"MD","Maryland"))
jobs%<>% mutate(State=str_replace_all(State,"MA","Massachusetts"))
jobs%<>% mutate(State=str_replace_all(State,"MI","Michigan"))
jobs%<>% mutate(State=str_replace_all(State,"MN","Minnesota"))
jobs%<>% mutate(State=str_replace_all(State,"MS","Mississippi"))
jobs%<>% mutate(State=str_replace_all(State,"MO","Missouri"))
jobs%<>% mutate(State=str_replace_all(State,"MT","Montana"))
jobs%<>% mutate(State=str_replace_all(State,"NE","Nebraska"))
jobs%<>% mutate(State=str_replace_all(State,"NV","Nevada"))
jobs%<>% mutate(State=str_replace_all(State,"NH","New Hampshire"))
jobs%<>% mutate(State=str_replace_all(State,"NJ","New Jersey"))
jobs%<>% mutate(State=str_replace_all(State,"NM","New Mexico"))
jobs%<>% mutate(State=str_replace_all(State,"NY","New York"))
jobs%<>% mutate(State=str_replace_all(State,"NC","North Carolina"))
jobs%<>% mutate(State=str_replace_all(State,"ND","North Dakota"))
jobs%<>% mutate(State=str_replace_all(State,"OH","Ohio"))
jobs%<>% mutate(State=str_replace_all(State,"OK","Oklahoma"))
jobs%<>% mutate(State=str_replace_all(State,"OR","Oregon"))
jobs%<>% mutate(State=str_replace_all(State,"PA","Pennsylvania"))
jobs%<>% mutate(State=str_replace_all(State,"RI","Rhode Island"))
jobs%<>% mutate(State=str_replace_all(State,"SC","South Carolina"))
jobs%<>% mutate(State=str_replace_all(State,"SD","South Dakota"))
jobs%<>% mutate(State=str_replace_all(State,"TN","Tennessee"))
jobs%<>% mutate(State=str_replace_all(State,"TX","Texas"))
jobs%<>% mutate(State=str_replace_all(State,"UT","Utah"))
jobs%<>% mutate(State=str_replace_all(State,"VT","Vermont"))
jobs%<>% mutate(State=str_replace_all(State,"VA","Virginia"))
jobs%<>% mutate(State=str_replace_all(State,"WA","Washington"))
jobs%<>% mutate(State=str_replace_all(State,"WV","West Virginia"))
jobs%<>% mutate(State=str_replace_all(State,"WI","Wisconsin"))
jobs%<>% mutate(State=str_replace_all(State,"WY","Wyoming"))

jobs$SC<-paste(jobs$County,jobs$State,sep=",")
jobs$FIPS<-as.numeric(jobs$FIPS)
names(jobs)[names(jobs) == 'FIPS'] <- 'fips'

jobs_map<-inner_join(ld.tick.county,jobs) 
## Joining, by = c("State", "County", "year", "fips", "SC")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## character vector and factor, coercing into character vector
jobs_map<-jobs_map %>% filter (Presence_current==1) 

Deer conversion

#Deer
deer<-read.csv("~/lyme_modeling/data/DEER/deer_density.csv")
deer$CntyName<-gsub(" County","",deer$CntyName)
names(deer)[names(deer)=='StateName']<-'State'
names(deer)[names(deer)=='CntyName']<-'County'
names(deer)[names(deer) == 'FIPS'] <- 'fips'
deer$SC<-paste(deer$County,deer$State,sep=",")

ld.pres.total<-inner_join(deer,ld.tick.county)
## Joining, by = c("fips", "State", "County", "SC")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
ld.jobs.total<-inner_join(deer,jobs_map)
## Joining, by = c("fips", "State", "County", "SC")
## Warning in inner_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector

State unemployment

#Building dataframe of average unemployment rate, average cases, total cases, and total population per state 2007-2015

u<- jobs_map %>% filter(Presence_current==1) %>% group_by(State,year) %>% summarize(av.UR=mean(UnemploymentRate),av.cases=mean(cases),tot.cases=sum(cases))
## Warning: Grouping rowwise data frame strips rowwise nature
#More than 250 cases
u250up <- u %>% filter(tot.cases>=250)

ggplot(u,aes(x=av.UR,y=tot.cases,col=as.factor(year)))+geom_point()

#Adding in population

z <- pop %>% group_by(state_name,year) %>% summarize(tot.pop=sum(pop))
z %<>% rename(State=state_name)

z <- inner_join(z,u)
## Joining, by = c("State", "year")
z %<>% mutate(incidence=(100000*(tot.cases/tot.pop)))

z_big <- z %>% filter(incidence>=20)

Unemployment plots and average case plots for same years; is there any sort of trend to help predict 2000-2006 Unemployment Rate?

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
#All of average cases per sets of 5 States

#"Alabama","Arkansas","Connecticut","Delaware","Florida"
ggplot(subset(u,State %in% c("Alabama","Arkansas","Connecticut","Delaware","Florida")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Gerogia","Illinois","Iowa","Louisiana","Maine"
ggplot(subset(u,State %in% c("Gerogia","Illinois","Iowa","Louisiana","Maine")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Maryland","Massachusetts","Michigan","Minnesota","Mississippi"
ggplot(subset(u,State %in% c("Maryland","Massachusetts","Michigan","Minnesota","Mississippi")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Missouri","Nebraska","New Hampshire","New Jesey","New York"
ggplot(subset(u,State %in% c("Missouri","Nebraska","New Hampshire","New Jesey","New York")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania"
ggplot(subset(u,State %in% c("North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Rhode Island","South Carolina","South Dakota","Tennessee","Texas"
ggplot(subset(u,State %in% c("Rhode Island","South Carolina","South Dakota","Tennessee","Texas")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Vermont","Virginia","West Virginia","Wisconsin"
ggplot(subset(u,State %in% c("Vermont","Virginia","West Virginia","Wisconsin")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#All 50 states Unemployment Rate
ggplot(u,aes(x=year,y=av.UR,group=State,colour=State))+geom_line()

#Subsets of 5

#"Alabama","Arkansas","Connecticut","Delaware","Florida"
ggplot(subset(u,State %in% c("Alabama","Arkansas","Connecticut","Delaware","Florida")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u, State %in% c("Alabama","Arkansas","Connecticut","Delaware","Florida")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Gerogia","Illinois","Iowa","Louisiana","Maine"
ggplot(subset(u,State %in% c("Gerogia","Illinois","Iowa","Louisiana","Maine")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("Gerogia","Illinois","Iowa","Louisiana","Maine")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Maryland","Massachusetts","Michigan","Minnesota","Mississippi"
ggplot(subset(u,State %in% c("Maryland","Massachusetts","Michigan","Minnesota","Mississippi")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("Maryland","Massachusetts","Michigan","Minnesota","Mississippi")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Missouri","Nebraska","New Hampshire","New Jesey","New York"
ggplot(subset(u,State %in% c("Missouri","Nebraska","New Hampshire","New Jesey","New York")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("Missouri","Nebraska","New Hampshire","New Jesey","New York")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania"
ggplot(subset(u,State %in% c("North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("North Carolina","North Dakota","Ohio","Oklahoma","Pennsylvania")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Rhode Island","South Carolina","South Dakota","Tennessee","Texas"
ggplot(subset(u,State %in% c("Rhode Island","South Carolina","South Dakota","Tennessee","Texas")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("Rhode Island","South Carolina","South Dakota","Tennessee","Texas")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

#"Vermont","Virginia","West Virginia","Wisconsin"
ggplot(subset(u,State %in% c("Vermont","Virginia","West Virginia","Wisconsin")))+geom_line(aes(year,av.UR,group=State,colour=State))

ggplot(subset(u,State %in% c("Vermont","Virginia","West Virginia","Wisconsin")))+geom_line(aes(year,log10(av.cases),group=State,colour=State))

Univeriate predictor plots

ggplot(ld.pres.total,aes(x=log10(prcp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2000,stat=identity))

ggplot(ld.pres.total,aes(x=log10(prcp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2008,stat=identity))

ggplot(ld.pres.total,aes(x=log10(prcp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2009,stat=identity))+geom_smooth(method="lm")

#Deer
ggplot(ld.pres.total,aes(x=(DeerDensityMax),y=log10(cases),group=DeerDensityMax))+geom_boxplot()
## Warning: Removed 34875 rows containing non-finite values (stat_boxplot).

ggplot(ld.pres.total,aes(x=(DeerDensityMode),y=log10(cases),group=DeerDensityMode))+geom_boxplot()
## Warning: Removed 34875 rows containing non-finite values (stat_boxplot).

#Population: As population increase so do cases
ld.pop.edit<-inner_join(ld.pop,ld.pres.total)
## Joining, by = c("State", "County", "cases", "year", "fips", "prcp", "avtemp", "mintmp", "maxtmp", "SC")
ggplot(ld.pop.edit,aes(x=log10(pop),y=log10(cases+1)))+geom_point(data=subset(ld.pop.edit,year==2005,stat=identity))+geom_smooth(method="lm")

ggplot(ld.pop.edit,aes(x=log10(pop),y=log10(cases+1)))+geom_point(data=subset(ld.pop.edit,year==2009,stat=identity))+geom_smooth(method="lm")

ggplot(ld.pop.edit,aes(x=log10(pop),y=log10(cases+1)))+geom_point(data=subset(ld.pop.edit,year==2008,stat=identity))+geom_smooth(method="lm")

#Temperature

ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2009,stat=identity))+geom_smooth(method="lm")

ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2008,stat=identity))+geom_smooth(method="lm")

ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2005,stat=identity))+geom_smooth(method="lm")

ggplot(ld.pres.total,aes(x=log10(avtemp),y=log10(cases+1)))+geom_point(data=subset(ld.pres.total,year==2006,stat=identity))+geom_smooth(method="lm")

Incidence plots

#Unemployment Rate 
ggplot(z_big,aes(x=av.UR,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")

x<-ld.pop.edit %>% group_by(State,year) %>% summarize(tot.cases=sum(cases),av.prcp=mean(prcp),av.temp=mean(avtemp),min.temp=mean(mintmp),max.temp=mean(maxtmp),tot.pop=sum(pop),av.deermax=mean(DeerDensityMax),av.deermode=mean(DeerDensityMode)) %>% mutate(incidence=(100000*(tot.cases/tot.pop)))
## Warning: Grouping rowwise data frame strips rowwise nature
x_big<- x %>% filter(incidence>=20)

#Precipitation: more precipitation, higher incidence; 2012-2013 have mid level rainfall with highest incidence--> result of other variable?
ggplot(x_big,aes(x=av.prcp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")

ggplot(x_big,aes(x=av.prcp,y=incidence))+geom_point(data=subset(x_big,year==2012),aes(col=as.factor(State)))+geom_smooth(method=lm)

#Temperature: average max temp has most negative relationship; higher the maximum temperature the lower the incidence 
test<-x_big%>%group_by(year)%>%summarize(tot.cases=sum(tot.cases),av.temp=mean(av.temp),min.temp=mean(min.temp),max.temp=mean(max.temp),av.prcp=mean(av.prcp))

ggplot(x_big,aes(x=av.temp,y=tot.cases))+geom_point(aes(col=as.factor(year)))+geom_smooth()
## `geom_smooth()` using method = 'loess'

ggplot(x_big,aes(x=min.temp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")

ggplot(x_big,aes(x=max.temp,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth()
## `geom_smooth()` using method = 'loess'

ggplot(test,aes(av.temp,tot.cases))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")+labs(x="Average Temperature",y="Total Cases",title="US Case Counts vs. Average Temperature")

#Deer
ggplot(data=subset(x_big,year==2000:2007),aes(av.deermax,incidence))+geom_boxplot(aes(col=as.factor(year)))

## GGPairs data visualization: from lyme_modeling example

library(GGally)

#Big picture correlations

ggpairs(ld.jobs.total,columns = c("cases","prcp","avtemp","UnemploymentRate"))

#Combine all thinned out dataframes
all_av<-inner_join(x,z)

ggpairs(x,columns=c("tot.cases","av.prcp","av.temp","tot.pop"))

x %<>% mutate(log10totpop=log10(tot.pop))
x %<>% mutate(log10totcases=log10(tot.cases + 1))

ggpairs(x,columns=c("log10totpop","log10totcases","av.prcp","av.temp"))

all_av%<>% mutate(log10totpop=log10(tot.pop))
all_av%<>% mutate(log10totcases=log10(tot.cases + 1))
all_av %<>% mutate(log10in=log10(incidence+1))
ggpairs(all_av,columns=c("log10in","av.prcp","av.UR","av.temp"))

lm_UR_in<-lm(av.UR ~ log10in, data=all_av)
summary(lm_UR_in)

#The modelr package: nesting method of organization is useful for explanatory modeling
by_state<-ld.pop.edit %>% group_by(State)
by_state %<>% nest

#Write a function that takes a data frame as its argument and returns a liner model object that predicts size by year
linGrowth_model<-function(df){
  lm(pop ~ year, data = df)
}

#State-wise statistical modeling exercise
models<-purrr::map(by_state$data,linGrowth_model)

#Add a column to the by_state df, where each row(State) has its own model object
by_state%<>%mutate(model=purrr::map(data,linGrowth_model))

library(modelr)
by_state%<>%mutate(resids=purrr::map2(data,model,add_residuals))

#Write a function that accepts an obj of the type in the resids list and returns a sum of the absolute values

sum_resids<-function(x){
  sum(abs(x$resid))
}
by_state%<>%mutate(totalResid=purrr::map(resids,sum_resids))

#Write a function that accepts a linear model and returns the slope
get_slope<-function(model){
  model$coefficients[2]
}
by_state%<>%mutate(slope=purrr::map(model,get_slope))

#Un-nest residual and slope column so not in list format
slopes<-unnest(by_state,slope)
totalResids<-unnest(by_state,totalResid)

#Plot growth rate for all states
slopes%>%ggplot(aes(State,slope))+geom_point()+theme(axis.text.x = element_text(angle=90,hjust=1))

#Plot total residuals for all states
totalResids%>%ggplot(aes(State,totalResid))+geom_point()+theme(axis.text.x=element_text(angle=90,hjust=1))
##Use different data frame name

by_state2 <- ld.pop.edit%>%group_by(State)%>%nest

#Write a function that accepts an element of the by_state$data list-column and returns the spearman correlation coefficient between Lyme disease cases and precipitation

runCor<-function(df){
  suppressWarnings(cor.test(df$cases,df$prcp,method="spearman")$estimate)
}

by_state2%<>%mutate(spCor=purrr::map(data,runCor))

spCors<-unnest(by_state2,spCor)
spCors%<>%arrange(desc(spCor))
spCors$State<-factor(spCors$State,levels=unique(spCors$State))
ggplot(spCors,aes(State,spCor))+geom_point()+theme(axis.text.x = element_text(angle=90,hjust=1))

Total cases and incidence county-based heat maps

heat<- inner_join(ld.tick.county,pop)
## Joining, by = c("year", "fips")
heat<-heat %<>% group_by(SC) %<>% summarize(tot.cases=sum(cases),tot.pop=sum(pop),av.cases=mean(cases))%<>% mutate(incidence=(100000*(tot.cases/tot.pop)))
## Warning: Grouping rowwise data frame strips rowwise nature
heat<-inner_join(heat,county_map,by="SC")
heat<-heat%<>%filter(tot.cases>0)

##How to change legend name?
ggplot(data=heat,aes(long,lat,group=group))+geom_polygon(data=heat,aes(fill=(tot.cases+1)))+scale_fill_gradientn(colours=rev(rainbow(7)),trans="log10")+labs(x="Longitude",y="Latitude",colours="Total Cases",title="County-based Lyme Disease Case Counts")

ggplot(data=heat,aes(long,lat,group=group))+geom_polygon(data=heat,aes(fill=(incidence+1)))+scale_fill_gradientn(colours=rev(rainbow(7)),trans="log10")+labs(x="Longitude",y="Latitude",colours="Incidence",title="County-based Lyme Disease Incidence")

Try adding terrain to maps

library(ggplot2)
library(ggmap)
library(maps)
library(mapdata)

maptmp<-make_bbox(lat=lat,lon=long,data=heat)
maptmp1<-get_map(location=maptmp,source="google",maptype = "terrain")
ggmap(maptmp1)

GLMM practice

library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
#AIC=66805.8 
gm1<-glmer(cases ~ UnemploymentRate + (1|prcp)+ (1|avtemp) ,data=ld.jobs.total,family=poisson)

#AIC=66803.8
gm2<-glmer(cases ~ UnemploymentRate + (1|avtemp) ,data=ld.jobs.total,family=poisson)

#AIC=55666.6
gm3<-glmer(cases ~ UnemploymentRate + (1|State)+ (1|avtemp),data=ld.jobs.total,family=poisson)

#AIC=55618.6-->Best model
gm4<-glmer(cases ~ DeerDensityMax + UnemploymentRate + (1|State)+(1|avtemp),data=ld.jobs.total,family=poisson)

Poster Presentation Outline

## Do cases occur only where ticks are present?
ld.tick %>% filter(cases>0) %>% ggplot(aes(x=as.factor(Presence_current),y=cases))+geom_boxplot()+scale_y_log10()+labs(x="Current Tick Presence",y="Cases",title="Do cases occur only where ticks are present?",caption="0=No ticks present || 1=Ticks present")

## Map visualization of presence spectrum vs. average cases:Counties outlined in red and filled with gold have 0 tick presence but av.cases>1. Average cases set at >1 to avoid zero-inflated case counties

ggplot(tmp1,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(tmp1$Presence_current==0,"red","black"),fill=ifelse(tmp1$av.cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)+labs(x="Longitude",y="Latitude",title="Average Lyme Disease Cases per County",caption="Red outline= tick presence of 0 || Black outline= tick presence of 1
                                                                                                                                                                                                                                    Gold fill=Average cases >1 || Blue fill= Average cases < 1")

## So where exactly are these cases happening?--> From this we can see that the distribution of Lyme Disease occurence seems mostly random, which pushes us to understand the current geographic distribution of Lyme cases in tick present counties to further understand why these cases may be expanding to 
ggplot(heat,aes(x=long,y=lat,group=group))+geom_polygon(col=ifelse(heat$SC %in% cnames.1$SC,"red","gray40"),fill=ifelse(heat$av.cases>1,"gold","powderblue"))+scale_fill_continuous()+coord_map("polyconic")+geom_path(lwd=0.1)+labs(x="Longitude",y="Latitude",title="Counties with Cases>1 where Ticks aren't Present")

## Specifically

#[1] "Maricopa,Arizona"          "Pima,Arizona"             
 #[3] "Alameda,California"        "Contra Costa,California"  
 #[5] "Humboldt,California"       "Los Angeles,California"   
 #[7] "Marin,California"          "Mendocino,California"     
 #[9] "Nevada,California"         "Riverside,California"     
#[11] "San Diego,California"      "San Francisco,California" 
#[13] "San Mateo,California"      "Santa Barbara,California" 
#[15] "Santa Clara,California"    "Santa Cruz,California"    
#[17] "Sonoma,California"         "Marion,Indiana"           
#[19] "Starke,Indiana"            "Johnson,Kansas"           
#[21] "Allegan,Michigan"          "Kent,Michigan"            
#[23] "Ottawa,Michigan"           "Washtenaw,Michigan"       
#[25] "Clearwater,Minnesota"      "Lake,Minnesota"           
#[27] "Polk,Minnesota"            "Douglas,Nebraska"         
#[29] "Lancaster,Nebraska"        "Clark,Nevada"             
#[31] "Washoe,Nevada"             "Cass,North Dakota"        
#[33] "Grand Forks,North Dakota"  "Delaware,Ohio"            
#[35] "Franklin,Ohio"             "Hamilton,Ohio"            
#[37] "Montgomery,Ohio"           "Douglas,Oregon"           
#[39] "Jackson,Oregon"            "Josephine,Oregon"         
#[41] "Multnomah,Oregon"          "Allegheny,Pennsylvania"   
#[43] "Armstrong,Pennsylvania"    "Beaver,Pennsylvania"      
#[45] "Butler,Pennsylvania"       "Clarion,Pennsylvania"     
#[47] "Fayette,Pennsylvania"      "Indiana,Pennsylvania"     
#[49] "Lawrence,Pennsylvania"     "Mercer,Pennsylvania"      
#[51] "Somerset,Pennsylvania"     "Washington,Pennsylvania"  
#[53] "Westmoreland,Pennsylvania" "Brown,Texas"              
#[55] "Frederick,Virginia"        "James City,Virginia"      
#[57] "Pulaski,Virginia"          "Rockbridge,Virginia"      
#[59] "Shenandoah,Virginia"       "Warren,Virginia"          
#[61] "King,Washington"   

## Total Lyme Disease case distribution map
ggplot(data=heat,aes(long,lat,group=group))+geom_polygon(data=heat,aes(fill=(tot.cases+1)))+scale_fill_gradientn(colours=rev(rainbow(7)),trans="log10")+labs(x="Longitude",y="Latitude",colours="Total Cases",title="County-based Lyme Disease Case Counts")

##Lyme Disease incidence distribution map
#Incidence: Allows us to control for influxes in total case count due to differences in population. The number of cases per 100,000 people, if every county had 100,000 people this is how many of them would have contracted Lyme Disease

ggplot(data=heat,aes(long,lat,group=group))+geom_polygon(data=heat,aes(fill=(incidence+1)))+scale_fill_gradientn(colours=rev(rainbow(7)),trans="log10")+labs(x="Longitude",y="Latitude",colours="Incidence",title="County-based Lyme Disease Incidence",caption="Incidence=100,000*(total cases/total population)")

## So what factors can influence Lyme Disease distribution?
test<-x_big%>%group_by(year)%>%summarize(tot.cases=sum(tot.cases),av.temp=mean(av.temp),min.temp=mean(min.temp),max.temp=mean(max.temp),av.prcp=mean(av.prcp),tot.pop=sum(tot.pop))%>%mutate(incidence=(100000*(tot.cases/tot.pop)))

##Does data support previous research on Lyme disease general patterns?
#Temperature (test is annual US averages from States > 250 total cases): Total cases increase with increasing temperature, Incidence decreases with increasing temperature
ggplot(test,aes(av.temp,tot.cases))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")+labs(x="Average Temperature",y="Total Cases",title="US Case Counts vs. Average Temperature")

ggplot(x_big,aes(av.temp,incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")+labs(x="Average Temperature",y="Incidence",title="State Incidence vs. Average Temperature")

#Precipitation:Cases and incidence increase with increased precipitation
ggplot(test,aes(av.prcp,tot.cases))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")+labs(x="Average Precipitation",y="Total Cases",title="US Case Counts vs. Average Precipitation")

ggplot(x_big,aes(av.prcp,incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")+labs(x="Average Presipitation",y="Incidence",title="State Incidence vs. Average Temperature")

##Although we have a general idea of environmental factors afffecting lyme disease, it is still unknown how socioeconic factors and common reservoir species density may affect distribution 

#All 50 states Unemployment Rates follow the same trends
ggplot(u,aes(x=year,y=av.UR,group=State,colour=State))+geom_line()+labs(x="Year",y="Average Unemployment Rate (%)",title="State Annual Unemployment Rates")

#How does incidence relate to average unemployment rate?: As unemployment rate increases,incidence decreases
ggplot(z_big,aes(x=av.UR,y=incidence))+geom_point(aes(col=as.factor(year)))+geom_smooth(method="lm")+labs(x="Average Unemployment Rate",y="Incidence",title="State Average Unemployment vs Incidence")

#How does deer density effect incidence and total cases?

ggplot(x_big,aes(x=av.deermax,y=tot.cases,group=av.deermax))+geom_boxplot()+labs(x="Average Deer Density",y="Total Cases",title="Deer Density effect on State Case Counts")

ggplot(x_big,aes(x=av.deermax,y=incidence,group=av.deermax))+geom_boxplot()+labs(x="Average Deer Density",y="Incidence",title="Deer Density effect on State Case Counts")